\documentclass[paper=a4, fontsize=11pt]{scrartcl}
\newcommand{\bfvec}[1]{\mbox{\boldmath$#1$}}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{charter}
\usepackage{amsmath}
\usepackage[top=0.75in, bottom=1in, left=1in, right=1in]{geometry}
\title{	
\normalfont \normalsize 
\textsc{Shanghai Jiaotong University, Zhiyuan College} \\ [25pt]
\huge Homework 3 \\
}
\author{Feng Shi}
\date{\normalsize\today}
\begin{document}
\maketitle
\begin{enumerate}
\item[Ex 2.29]
\begin{enumerate}

\item[-]
{\Large \textbf{Solution 1:}}
\begin{enumerate}
\item[1.]
The surface area of a $d-$dimensional unit cube is the sum of surface areas of its 
sides, which are $(d-1)-$dimensional volumes. And the $d-$dimensional cube has $2d$
sides. So a $d-$dimensional unit cube has surface area of $2d$.

\item[2.]
Consider generating uniformly random points on the surface of the $d-$dimensional cube
and calculate the probability of the points lying near the equator defined above. What
I want to prove is that the distance from the random points to the hyperplane is
almost zero.

Since a $d-$dimensional cube has $2d$ sides of $(d-1)-$dimension,
a randomly chosen point on the surface of the $d-$dimensional cube has a probablity
of $\frac{1}{2d}$ lying on the side 
$$\lbrace(x_{1},x_{2},...,x_{i}=0,...,x_{d}) | 0\leq x_{j}\leq 1,j\in[1,d]\backslash i\rbrace$$
Consider generating uniformly random points on this side. This is just generating
each coordinate randomly on $[0,1]$ since it's a $(d-1)-$dimensional cube. 
So each coordinate has an expectation of $\frac{1}{2}$ and the expectation of the
sum of every coordinate is $\frac{d-1}{2}$ by linearity of expectation (and each
coordinate is independent of course).

By symmetry there is a side
$$\lbrace(x_{1},x_{2},...,x_{i}=1,...,x_{d}) | 0\leq x_{j}\leq 1,j\in[1,d]\backslash i\rbrace$$
and the expectation of sum of the coordinates of a randomly generated points on 
this surface is $\frac{d+1}{2}$. This yields an expectation of sum of coordinates on
a pair of sides of $\frac{d}{2}$.
Let random variable $\bfvec{X}$ denote $\sum_{i=1}^{d}x_{i}$
So the expectation of sum of coordinates of a point on the surface 
of a $d-$dimensional unit cube is 
$$E(\bfvec{X})=E(\sum_{i=1}^{d}x_{i})=\frac{d}{2}\quad \mbox{$\bfvec{x}$ is on the surface}$$ 
To calculate the variance, change the coordinate system and make origin the center of the cube,
then each coordinate has range $[-\frac{1}{2},\frac{1}{2}]$.
Changing the coordinate system (i.e. changing the expected value) won't change the variance.
Denote the cube with $C^{d}$.
\begin{align*}
	\sigma(\bfvec{X})&=\int_{C^{d}}(\bfvec{X}-\frac{d}{2})^2 dS \\
					&=\int_{C^{d}}(\sum_{i=1}^{d}x_{i}^{2}+
					\sum_{i=1}^{d}\sum_{j=1,i\neq j}^{d}x_{i}x_{j})dS \\
\end{align*}
Since
$$\int_{-\frac{1}{2}}^{\frac{1}{2}}x_{i}x_{j}dx_{i}=0 \quad \textrm{$i\neq j$}$$

\begin{align*}
	\sigma{\bfvec{X}}&=\int_{C^{d}}\sum_{i=1}^{d}x_{i}^{2}dx_{i} \\
					&=d\int_{C^{d}}x_{i}^{2}dx_{i} \\
					&=d\int_{-\frac{1}{2}}^{\frac{1}{2}}\int_{-\frac{1}{2}}^{\frac{1}{2}}...
					\int_{-\frac{1}{2}}^{\frac{1}{2}}x_{i}^{2}dx_{1}dx_{2}...dx_{d} \\
					&=\frac{d}{12}
\end{align*}
According to Chebyshev's inequality
$$p(|\bfvec{X}-\frac{d}{2}|\geq\epsilon)\leq\frac{\frac{d}{12}}{\epsilon^2}
=\frac{d}{12\epsilon^{2}}$$
Now we denote the axis through the center of the cube and perpendicular to the equator
the $o-axis$, and we take the ``thickness'' of the equatorial slice half of the length
of the $o-axis$, namely $\sqrt{d}$, and prove that the probablity of the 
randomly generated point lying outside the slice goes to zero as $d\to\infty$.
(in the below we prove in half of the cube "above" the equator)

Denote a randomly generator point $\bfvec{x}$, the hyperplane parrallel to the
equator and with $\bfvec{x}$ on it $\Pi$. Then when the distance between the equator 
and $\Pi$ is $\frac{\sqrt{d}}{2}$, the distance of $\bfvec{x}$ from the equator is
actually $$\Delta d=\frac{|\bfvec{x}\cdot\bfvec{n}|}{|\bfvec{n}|}=\frac{|X-\frac{d}{2}|}{\sqrt{d}}$$
$$|\bfvec{X}-\frac{d}{2}|=\sqrt{d} \Delta d $$
$$\sigma(\Delta d)=\frac{\sigma(\bfvec{X})}{(\sqrt{d})^2}=\frac{1}{12}$$
That says, when $\bfvec{X}$ is $\epsilon$ from its expected value $\frac{d}{2}$, the
distance between the two hyperspaces is actually $\frac{\epsilon}{\sqrt{d}}$.
Let $\epsilon=c\frac{d}{2}$, then
$$p(|\bfvec{X}-\frac{d}{2}|\geq c\frac{d}{2})\leq\frac{1}{c^2d}$$
also
$$p(|\Delta d|>c\sqrt{d})\leq \frac{1}{c\sqrt{d}}$$
where c could be very small for the "equatorial slice" to be thin 
with respect to the length of $o-aixs$.
We can conclude that, when d is large, most of the points
generated randomly lie near the equator given by
$$\lbrace \bfvec{x}:\sum_{i=1}^{d}x_{i}=\frac{d}{2} \rbrace$$
So the surface area is near the equator(with respect to a porpotion to the length of
the axis).
\end{enumerate}
\vspace{10pt}

\item[-]
{\Large \textbf{Solution 2:}}(not very rigorous)
\begin{enumerate}
\item[1.]
same as solution 1.
\item[2.]
{\large \textbf{Proposition 1.}} When $d$ is large, an axis is almost contained in the
equator given by $\lbrace \bfvec{x}:\sum_{i=1}^{d}x_{i}=\frac{d}{2} \rbrace$ in a 
$d-$dimensional cube. Actually every axis is almost contained in the equator.

{\large \textbf{Proposition 2.}} The surface area of a $(d-1)-$dimensional side of a
$d-$dimensional cube is the volume of a $(d-1)-$dimensional cube.

{\large \textbf{Proposition 3.}} A $(d-2)-$dimensional "line" connecting two $(d-1)-$-
dimensional sides int a $d-$dimensional cube is an axis of the cube, and an equator
of a $(d-1)-$dimensional cube.
(These propositions won't be proved so this proof is not rigorous)

Given the above propositions, we only need to prove that the volume of a high
dimensional cube is concentrated at its equator.

Move the cube with its center at the origin. Each coordinate havs range
$[-\frac{1}{2},\frac{1}{2}]$. Let $C^{d}$ denote the cube. Set
$$S_{d}(\bfvec{x})=\sum_{i=1}^{d}x_{i}$$
and define the "equatorial slice" according to the question
$$S_{\epsilon}=\lbrace \bfvec{x}\in C^{d} | |\frac{S_{d}(\bfvec{x})}{d}|<\epsilon \rbrace$$
by the Weak Law of Large Numbers, we have
$$\lim_{d \to \infty} Volume(S_{\epsilon})=0$$
We notice
$$(S_{d}(\bfvec{x}))^2=\sum_{i=1}^{d}x_{i}^{2}+\sum_{i=1}^{d}\sum_{j=1,j\neq i}^{d}2x_{i}x_{j}$$
When we integrate over $C^{d}$, since
$$\int_{-\frac{1}{2}}^{\frac{1}{2}}x_{i}dx_{i}=0$$
and
$$\int_{C^{d}}x_{i}^{2}dx_{i}=\int_{-\frac{1}{2}}^{\frac{1}{1}} \int_{-\frac{1}{2}}^{\frac{1}{1}}... \int_{-\frac{1}{2}}^{\frac{1}{1}} x_{i}^{2}dx_{i}dx_{2}...dx_{d}=\frac{1}{12}$$
So $$\sigma^{2}=\frac{d}{12}$$ (this can be used to complete the proof in solution 1)

Applying Chebyshev's inequality yields
$$Volume({\bfvec{x}||S_{d}(\bfvec{x})|>\delta})<\frac{d}{12\delta^2}$$
$$Volume({\bfvec{x}||\frac{S_{d}(\bfvec{x})}{d}|>\epsilon})<\frac{1}{12n\epsilon^2}$$
which means that the volume of a $d-$dimensional cube is concentrated around the
equator. Thus by propositions above we have the surface area around the equator too.
(not very rigorous)

\end{enumerate}
\vspace{10pt}


\end{enumerate}
\newpage
\item[Ex 2.43]
{\Large \textbf{Solution:}}

The block of code for generating Gaussian is listed below.

(Method by Marsaglia and Bray)
\lstset{language=C++}
\lstset{breaklines}
\lstset{extendedchars=false}
\begin{lstlisting}

double gaussrand()
{
    static double V1, V2, S;
    static int phase = 0;
    double X;
     
    if ( phase == 0 ) {
        do {
            double U1 = (double)rand() / RAND_MAX;
            double U2 = (double)rand() / RAND_MAX;
             
            V1 = 2 * U1 - 1;
            V2 = 2 * U2 - 1;
            S = V1 * V1 + V2 * V2;
        } while(S >= 1 || S == 0);
         
        X = V1 * sqrt(-2 * log(S) / S);
    } else
        X = V2 * sqrt(-2 * log(S) / S);
         
    phase = 1 - phase;
 
    return X;
}

\end{lstlisting}
{\Large \textbf{Results:}}
\begin{enumerate}
\item[-]
{\Large Sum of squared error}

\begin{tabular}{c r @{.} l} 
Projected dimension
& 
\multicolumn{2}{c}{Squared Error} \\ 
\hline 
$100$ & 2217&43 \\ 
$50$  & 2331&43 \\ 
$10$  & 2742&93 \\ 
$5$   & 2801&21 \\
$4$   & 3194&08 \\
$3$   & 2786&73 \\
$2$   & 2984&44 \\
$1$   & 2492&76 \\
\end{tabular} 

The squared error didn't show the increasing property as expected. I think it might
be due to the small size of the set of points. So I tried generating more points.
When I generated 60 points the decreasing property shows up (except 1 dimension
because it's special).

\begin{tabular}{c r @{.} l}
Projected dimension
&
\multicolumn{2}{c}{Squared Error} \\
\hline
$100$ & 21853&8 \\
$50$  & 22044&6 \\
$10$  & 23443&5 \\
$5$   & 23887&5 \\
$4$   & 24459&8 \\
$3$   & 24178&8 \\
$2$   & 25311&2 \\
$1$   & 24107&1 \\
\end{tabular}


\item[-]
	{\Large Distribution of distances(190distances between 20 points)}

\begin{center}
	\includegraphics[height=5cm]{1000}

	original

	\includegraphics[height=5cm]{0100}

	projected to 100 dimensions.

	\includegraphics[height=5cm]{0050}

	projected to 50 dimensions.

	\includegraphics[height=5cm]{0010}

	projected to 10 dimensions.

	\includegraphics[height=5cm]{0005}

	projected to 5 dimensions.

	\includegraphics[height=5cm]{0004}

	projected to 4 dimensions.

	\includegraphics[height=5cm]{0003}

	projected to 3 dimensions.

	\includegraphics[height=5cm]{0002}

	projected to 2 dimensions.

	\includegraphics[height=5cm]{0001}

	projected to 1 dimension.
\end{center}
\end{enumerate}

\end{enumerate}
\end{document}

